Ken Furudate
import scanpy as sc
import squidpy as sq
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import matplotlib.font_manager
plt.rcParams['font.sans-serif'] = ['Arial'] + plt.rcParams['font.sans-serif']
plt.rcParams["font.size"] = 20
plt.rcParams['pdf.fonttype'] = 42
import os
from pathlib import Path
import pickle
def pickle_load(path):
with open(path, mode='rb') as f:
data = pickle.load(f)
return data
import seaborn as sns
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))
import warnings
warnings.filterwarnings('ignore')
sc.logging.print_header()
print(f"squidpy=={sq.__version__}")
datadir = "/data/spatial/"
in_f = "integrated_data.h5ad"
data = sc.read_h5ad(datadir + in_f)
data
adata = data[data.obs['sample']=="A"]
bdata = data[data.obs['sample']=="B"]
cdata = data[data.obs['sample']=="C"]
adata = adata[adata.obs['category']=="tumor"]
bdata = bdata[bdata.obs['category']=="tumor"]
cdata = cdata[cdata.obs['category']=="tumor"]
adata.obs["region"] = adata.obs["leiden"].copy().astype(str)
adata.obs.replace({"region": {"0": "Predominant_pri"}}, inplace=True)
adata.obs.replace({"region": {"2": "Predominant_pri"}}, inplace=True)
adata.obs.replace({"region": {"11": "Predominant_pri"}}, inplace=True)
adata.obs.replace({"region": {"1": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"3": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"4": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"5": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"6": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"7": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"8": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"9": "Metastatic_pri"}}, inplace=True)
adata.obs.replace({"region": {"10": "Metastatic_pri"}}, inplace=True)
bdata.obs["region"] = bdata.obs["leiden"].copy().astype(str)
bdata.obs.replace({"region": {"0": "Predominant_pri"}}, inplace=True)
bdata.obs.replace({"region": {"2": "Predominant_pri"}}, inplace=True)
bdata.obs.replace({"region": {"11": "Predominant_pri"}}, inplace=True)
bdata.obs.replace({"region": {"1": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"3": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"4": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"5": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"6": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"7": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"8": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"9": "Metastatic_pri"}}, inplace=True)
bdata.obs.replace({"region": {"10": "Metastatic_pri"}}, inplace=True)
adata.uns["spatial"] = pickle_load(datadir + 'adata_uns_spatial.pickle')
bdata.uns["spatial"] = pickle_load(datadir + 'bdata_uns_spatial.pickle')
cdata.uns["spatial"] = pickle_load(datadir + 'cdata_uns_spatial.pickle')
ddata.uns["spatial"] = pickle_load(datadir + 'ddata_uns_spatial.pickle')
sample_lst = ["A", "B", "C"]
def set_param(input_data, sample):
scale = input_data.uns['spatial'][f"{sample}"]['scalefactors']['tissue_hires_scalef']
img = sq.im.ImageContainer(input_data.uns['spatial'][f"{sample}"]['images']['hires'],
scale=scale,
library_id=f"{sample}")
img.show()
return scale, img
scale_a, img_a = set_param(adata, "A")
scale_b, img_b = set_param(bdata, "B")
scale_c, img_c = set_param(cdata, "C")
sc.pl.spatial(adata=adata,
color=["integrated spatial transcriptome cluster", 'integration_analysis'],
na_in_legend=False,
)
sc.pl.spatial(adata=bdata,
color=["integrated spatial transcriptome cluster", 'integration_analysis'],
na_in_legend=False,
)
count_a = pd.read_table("Fig.3b_SampleA.txt")
count_b = pd.read_table("Fig.3b_SampleB.txt")
count_c = pd.read_table("Fig.3b_SampleC.txt")
count_a
count_b
count_c
sampleA_ = data.obs[data.obs["sample"] == "A"].copy()
sampleA_["Unnamed: 0"] = sampleA_.index
sampleA_
data_a_merge = pd.merge(count_a, sampleA_, on="Unnamed: 0", how='left')
data_a_merge
sampleB_ = data.obs[data.obs["sample"] == "B"].copy()
sampleB_["Unnamed: 0"] = sampleB_.index
sampleB_
data_b_merge = pd.merge(count_b, sampleB_, on="Unnamed: 0", how='left')
data_b_merge
sampleC_ = data.obs[data.obs["sample"] == "C"].copy()
sampleC_["Unnamed: 0"] = sampleC_.index
sampleC_
data_c_merge = pd.merge(count_c, sampleC_, on="Unnamed: 0", how='left')
data_c_merge
data_ab_merge = pd.concat([data_a_merge, data_b_merge])
data_ab_merge
cell_type = [
"OSCC cell", "CAF", "MAF"
]
analysis_df = data_ab_merge[cell_type+["leiden"]]
analysis_df.reset_index(inplace=True, drop=True)
analysis_df
# Remove non-OSCC cell
input_df = analysis_df.copy()
drop_idx = []
for idx, cnt_ in enumerate(analysis_df["OSCC cell"]):
if int(cnt_) == 0:
drop_idx.append(idx)
input_df.drop(index=drop_idx, inplace=True)
input_df.reset_index(drop=True, inplace=True)
input_df
cond_lst = []
for clu in input_df["leiden"]:
if int(clu) == 0:
cond_lst.append("Predominant_pri")
elif int(clu) == 2:
cond_lst.append("Predominant_pri")
elif int(clu) == 11:
cond_lst.append("Predominant_pri")
else:
cond_lst.append("Metastatic_pri")
input_df["area"] = cond_lst
input_df
input_df[cell_type] = input_df[cell_type].where(input_df[cell_type] < 1, 1)
input_df
input_df["colocalization"] = input_df[cell_type].sum(axis=1)
input_df
select_col1 = "colocalization"
select_col2 = "area"
input_df2 = input_df[[select_col1, select_col2]]
input_df2
# One hot enchoding
one_hot_df = pd.get_dummies(input_df2, columns=[select_col2])
one_hot_df.columns = ["colocalization", "Metastatic_pri", "Predominant_pri"]
one_hot_df
one_hot_df = one_hot_df.groupby(by="colocalization").sum()
one_hot_df
one_hot_df2 = one_hot_df.reindex(columns=['Predominant_pri', 'Metastatic_pri'])
one_hot_df2
sns.set(font_scale=2, style='white')
one_hot_df2.plot.bar(color=['#377EB8', '#E41A1C'])
plt.legend(fontsize=20, loc='upper right', bbox_to_anchor=(1.3, 1))
plt.xticks(ticks=[0,1,2],
labels=['OSCC cell alone',
'moderate \n colocalization',
'high colocalization'
],
rotation=60)
plt.xlabel('')
plt.ylabel('Number of cells \n present per spot')
sns.despine()
plt.show()
one_hot_df2/one_hot_df2.sum(axis=0)
sns.set(font_scale=2, style='white')
(one_hot_df2/one_hot_df2.sum(axis=0)*100).plot.bar(color=['#377EB8', '#E41A1C'])
plt.legend(fontsize=20, loc='upper right', bbox_to_anchor=(1.3, 1))
plt.xticks(ticks=[0,1,2],
labels=['OSCC cell alone',
'moderate \n colocalization',
'high colocalization'
],
rotation=60)
plt.xlabel('')
plt.ylabel('Percentage of cells \n present per spot [%]')
sns.despine()
plt.show()
one_hot_df2
fisher.test(one_hot_df2)